knitr document van Steensel lab

Epigenetic Screening

Introduction

This script tries to find out which drugs are able to significantly alter indel pattern formation in RSTP2#5 cells in different chromatin contexts. In the end, the aim is to couple biological relevance to these changes.

Description of Data

How to make a good rendering table:

column1 column2 column3
1 2 3
a b c

Data processing

Path, Libraries, Parameters and Useful Functions

## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.0.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## ========================================
## ========================================
## circlize version 0.4.6
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: http://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization 
##   in R. Bioinformatics 2014.
## ========================================
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
## Loading required package: graph
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter,
##     Find, get, grep, grepl, intersect, is.unsorted, lapply, Map,
##     mapply, match, mget, order, paste, pmax, pmax.int, pmin,
##     pmin.int, Position, rank, rbind, Reduce, rownames, sapply,
##     setdiff, sort, table, tapply, union, unique, unsplit, which,
##     which.max, which.min
## 
## Attaching package: 'graph'
## The following object is masked from 'package:circlize':
## 
##     degree
## 
## Loading required package: futile.logger
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:VennDiagram':
## 
##     rotate
## 
## Attaching package: 'Laurae'
## The following object is masked from 'package:data.table':
## 
##     setDF

Bring dataframe in suitable format for heatmap function

# Split up per concentration
indel.data.high <- indel.data[grep("10um", indel.data$condition),]
indel.data.med <- indel.data[grep("1um", indel.data$condition),]
indel.data.low <- indel.data[grep("100nm", indel.data$condition),]

# Compute mean of logratio per barcode and drug
indel.data.high$mean.logratio.bc.drug <- 
  ave(indel.data.high$logratio.platenorm, 
      indel.data.high$barcode, indel.data.high$Drug, FUN = function(x) mean(x))
indel.data.med$mean.logratio.bc.drug <- 
  ave(indel.data.med$logratio.platenorm, 
      indel.data.med$barcode, indel.data.med$Drug, FUN = function(x) mean(x))
indel.data.low$mean.logratio.bc.drug <- 
  ave(indel.data.low$logratio.platenorm, 
      indel.data.low$barcode, indel.data.low$Drug, FUN = function(x) mean(x))

# Compute mean of logratio per barcode and target
indel.data.high$mean.logratio.bc.target <- 
  ave(indel.data.high$logratio.platenorm, 
      indel.data.high$barcode, indel.data.high$Target, FUN = function(x) mean(x))
indel.data.med$mean.logratio.bc.target <- 
  ave(indel.data.med$logratio.platenorm, 
      indel.data.med$barcode, indel.data.med$Target, FUN = function(x) mean(x))
indel.data.low$mean.logratio.bc.target <- 
  ave(indel.data.low$logratio.platenorm, 
      indel.data.low$barcode, indel.data.low$Target, FUN = function(x) mean(x))

# Compute mean of t-statistic per barcode and target
indel.data.high$mean.statistic.bc.target <- 
  ave(indel.data.high$statistic, 
      indel.data.high$barcode, indel.data.high$Target, FUN = function(x) mean(x))
indel.data.med$mean.statistic.bc.target <- 
  ave(indel.data.med$statistic, 
      indel.data.med$barcode, indel.data.med$Target, FUN = function(x) mean(x))
indel.data.low$mean.statistic.bc.target <- 
  ave(indel.data.low$statistic, 
      indel.data.low$barcode, indel.data.low$Target, FUN = function(x) mean(x))

# Compute mean of t-statistic per barcode and drug
indel.data.high$mean.statistic.bc.drug <- 
  ave(indel.data.high$statistic, 
      indel.data.high$barcode, indel.data.high$Drug, FUN = function(x) mean(x))
indel.data.med$mean.statistic.bc.drug <- 
  ave(indel.data.med$statistic, 
      indel.data.med$barcode, indel.data.med$Drug, FUN = function(x) mean(x))
indel.data.low$mean.statistic.bc.drug <- 
  ave(indel.data.low$statistic, 
      indel.data.low$barcode, indel.data.low$Drug, FUN = function(x) mean(x))

# Compute mean of efficiency per barcode and drug
indel.data.high$mean.eff.bc.drug <- 
  ave(indel.data.high$efficiency.platenorm, 
      indel.data.high$barcode, indel.data.high$Drug, FUN = function(x) mean(x))
indel.data.med$mean.eff.bc.drug <- 
  ave(indel.data.med$efficiency.platenorm, 
      indel.data.med$barcode, indel.data.med$Drug, FUN = function(x) mean(x))
indel.data.low$mean.eff.bc.drug <- 
  ave(indel.data.low$efficiency.platenorm, 
      indel.data.low$barcode, indel.data.low$Drug, FUN = function(x) mean(x))


# Compute mean and sd of barcode counts per barcode and drug
indel.data.high$mean.count.bc.drug <- 
  ave(indel.data.high$rel.counts, 
      indel.data.high$barcode, indel.data.high$Drug, FUN = function(x) mean(x))
indel.data.high$sd.count.bc.drug <- 
  ave(indel.data.high$rel.counts, 
      indel.data.high$barcode, indel.data.high$Drug, FUN = function(x) sd(x))
indel.data.med$mean.count.bc.drug <- 
  ave(indel.data.med$rel.counts, 
      indel.data.med$barcode, indel.data.med$Drug, FUN = function(x) mean(x))
indel.data.med$sd.count.bc.drug <- 
  ave(indel.data.med$rel.counts, 
      indel.data.med$barcode, indel.data.med$Drug, FUN = function(x) sd(x))
indel.data.low$mean.count.bc.drug <- 
  ave(indel.data.low$rel.counts, 
      indel.data.low$barcode, indel.data.low$Drug, FUN = function(x) mean(x))
indel.data.low$sd.count.bc.drug <- 
  ave(indel.data.low$rel.counts, 
      indel.data.low$barcode, indel.data.low$Drug, FUN = function(x) sd(x))

Generating heatmaps per barcode

## Using barcode as id variables
## Using barcode as id variables
## Using barcode as id variables
## Using barcode as id variables
## Using barcode as id variables
## Using barcode as id variables

Annotate heatmap columns and rows

## Warning in `!=.default`(target$Target, c("Negative Control", "MRN", "DNA-
## PK")): longer object length is not a multiple of shorter object length
## Warning in is.na(e1) | is.na(e2): longer object length is not a multiple of
## shorter object length
target$random <- c(1:156)
row.names(target) <- target$Drug
target2 <- target
target <- target[,c(-1,-3)]
target <- as.data.frame(target)
row.names(target) <- target2$Drug


# Add type of chr. mark (repressive or active)
type <- mapping_mean_long[,c(2,4)]
type <- unique(type)
type$random <- c(1:17)
row.names(type) <- type$variable
type2 <- type
type <- type[,c(-1,-3)]
type <- as.data.frame(type)
row.names(type) <- type2$variable

# Add annotation of barcodes (based on clustering in 0_Clone5_Annotation.Rmd)
barcode.annotation <- as.data.frame(indel.data.high[,1])
barcode.annotation <- unique(barcode.annotation)
barcode.annotation <- barcode.annotation %>% 
  remove_rownames %>% column_to_rownames(var="indel.data.high[, 1]")
barcode.annotation$cluster[c(18,2)] <- "Active Promoter/Enhancer"
barcode.annotation$cluster[c(12,8)] <- "H3K9me2/3 Domain"
barcode.annotation$cluster[c(3,14,4,17)] <- "Active Gene Body"
barcode.annotation$cluster[c(5,13,6,15)] <- "LAD"
barcode.annotation$cluster[c(7,16)] <- "No Marks Present"
barcode.annotation$cluster[c(1,10,9,11)] <- "H3K27me3 Domain"


# Change colors of annotations
colors <- brewer.pal(length(unique(barcode.annotation$cluster)), "Pastel2")
names(colors) <- unique(barcode.annotation$cluster)

annotation_colors_scr = list(
  cluster = colors,
  type = c(Active="#669966", Repressive="grey90"),
  target = c(HDAC="#60988D", HAT="#C6D0A8", Sirtuin="#8FCAAC",
             HMT="#FDCDAC", DNMT="#D69C81", `Histone Demethylase`="#FFF2AE",
             HIF="#E3DCD5", JAK="#C8D7E2", PIM="#9AB8C4", `Aurora Kinase`="#F4CAE4",
             PARP="#CB546F", `Epigenetic Reader Domain`= "#476D61", `DNA-PK`="#E07A43"
             ))



# Order the annotation legend
ordered_marks <- c("Active Promoter/Enhancer", "Active Gene Body", "No Marks Present", "H3K27me3 Domain",  "H3K9me2/3 Domain", "LAD")
annotation_colors_scr$cluster <- annotation_colors_scr$cluster[ordered_marks]

ordered_marks2 <- c("DNA-PK","Epigenetic Reader Domain", "HDAC",  "Sirtuin", "HAT", 
                    "Histone Demethylase", "HMT", "DNMT", 
                    "HIF", "JAK", "PIM",
                    "Aurora Kinase", "PARP")
annotation_colors_scr$target <- annotation_colors_scr$target[ordered_marks2]

Next, I want to create heatmaps to visualize the effect of each individual drug on each of the 18 barcodes.

Results

logratio heatmap of each individual drug vs. barcode

## This heatmap contains too much information -> We need to decrease the number of drugs ## Only drugs that change the logratio

## Read count heatmap

Efficiency/logratio overlay

## (polygon[GRID.polygon.194], polygon[GRID.polygon.195], polygon[GRID.polygon.196], polygon[GRID.polygon.197], text[GRID.text.198], text[GRID.text.199], text[GRID.text.200], text[GRID.text.201], text[GRID.text.202])

Next step is to import the chromatin data to generate a correlation matrix between the logratio change of each drug and the chromatin data. A correlation is calculated by comparing logratio changes of drug A for each barcode with ChIP data of ChIP dataset A for each barcode etc.

Correlation between ChIP data and logratio changes

## Using ChIP as id variables

# There are some interesting drug-induced changes that can be explained by certain chromatin marks - this can be looked at in more detail

Cluster-specific effects: now, instead of correlating each chromatin mark against drug-induced change, I try to correlate the drug-induced change against the chromatin clusters

## Using 'cluster' as value column. Use 'value.var' to override
clusters2 <- clusters2 %>% 
  remove_rownames %>% column_to_rownames(var="unique(indel.data$barcode)")

clusters2[!is.na(clusters2)] <- 1
clusters2[is.na(clusters2)] <- 0

clusters2$Active_G.B. <- as.numeric(clusters2$Active_G.B.)
clusters2$Active_P.E. <- as.numeric(clusters2$Active_P.E.)
clusters2$H3K27 <- as.numeric(clusters2$H3K27)
clusters2$H3K9 <- as.numeric(clusters2$H3K9)
clusters2$LAD <- as.numeric(clusters2$LAD)
clusters2$No <- as.numeric(clusters2$No)


cor.clusters.high <- cor(clusters2, indel.data.high.drug)
cor.clusters.med <- cor(clusters2, indel.data.med.drug)
cor.clusters.low <- cor(clusters2, indel.data.low.drug)

cor.clusters.high.eff <- cor(clusters2, indel.data.high.eff)
cor.clusters.med.eff <- cor(clusters2, indel.data.med.eff)
cor.clusters.low.eff <- cor(clusters2, indel.data.low.eff)

# Take only drugs that have correlation of absolute > 0.5 with at least one ChIP dataset
cor.clusters.high <-
  cor.clusters.high[,
                      colSums(cor.clusters.high > 0.6) >= 1 |
                        colSums(cor.clusters.high < -0.6) >= 1]

cor.clusters.med <-
  cor.clusters.med[,
                      colSums(cor.clusters.med > 0.6) >= 1 |
                        colSums(cor.clusters.med < -0.6) >= 1]

cor.clusters.low <-
  cor.clusters.low[,
                      colSums(cor.clusters.low > 0.6) >= 1 |
                        colSums(cor.clusters.low < -0.6) >= 1]

cor.clusters.high.eff <-
  cor.clusters.high.eff[,
                      colSums(cor.clusters.high.eff > 0.6) >= 1 |
                        colSums(cor.clusters.high.eff < -0.6) >= 1]

cor.clusters.med.eff <-
  cor.clusters.med.eff[,
                      colSums(cor.clusters.med.eff > 0.6) >= 1 |
                        colSums(cor.clusters.med.eff < -0.6) >= 1]

cor.clusters.low.eff <-
  cor.clusters.low.eff[,
                      colSums(cor.clusters.low.eff > 0.6) >= 1 |
                        colSums(cor.clusters.low.eff < -0.6) >= 1]

# plotting the correlations
cor.pheatmap <- 
  list(cor.clusters.high, cor.clusters.med, cor.clusters.low)
cor.pheatmap.eff <- 
  list(cor.clusters.high.eff, cor.clusters.med.eff, cor.clusters.low.eff)



for(i in 1:(length(cor.pheatmap))) {
  data <- cor.pheatmap[[i]]
p <- pheatmap(data, 
         annotation_col = target,
         annotation_row = type,
         color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100),
         annotation_colors = annotation_colors_scr, annotation_legend = F,
         main = paste(i,"correlation heatmap: ChiP data vs. logratio change per drug"))
print(p)
}

## [1] 0.08515303
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## cluster      5  2.370  0.4741    5.77 0.000759 ***
## Residuals   30  2.465  0.0822                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Again, some interesting stuff

HDAC vs Integration 17

Correlation plots individual drugs

HDAC correlation

## Using 'logratio.platenorm' as value column. Use 'value.var' to override
## Using ChIP as id variables

Drug dose response

Exporting potential data.

Session Info

## [1] "Run time:  16.3733 secs"
## [1] "/DATA/usr/m.trauernicht/projects/EpiScreen/epigenetic-screening-on-trip-clone"
## [1] "Tue Sep  3 16:05:55 2019"
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] Laurae_0.0.0.9001    ggpubr_0.2           magrittr_1.5        
##  [4] tibble_2.1.1         ggforce_0.2.2        VennDiagram_1.6.20  
##  [7] futile.logger_1.4.3  tidyr_0.8.3          Pigengene_1.10.0    
## [10] graph_1.62.0         BiocGenerics_0.30.0  RColorBrewer_1.1-2  
## [13] gridExtra_2.3        gridGraphics_0.4-0   pheatmap_1.0.12     
## [16] circlize_0.4.6       ComplexHeatmap_2.0.0 data.table_1.12.2   
## [19] ggbeeswarm_0.6.0     ggplot2_3.1.1        outliers_0.14       
## [22] dplyr_0.8.1         
## 
## loaded via a namespace (and not attached):
##  [1] Cubist_0.2.2          colorspace_1.4-1      rjson_0.2.20         
##  [4] dynamicTreeCut_1.63-1 htmlTable_1.13.1      GlobalOptions_0.1.0  
##  [7] base64enc_0.1-3       clue_0.3-57           rstudioapi_0.10      
## [10] farver_1.1.0          bit64_0.9-7           AnnotationDbi_1.46.0 
## [13] mvtnorm_1.0-10        codetools_0.2-16      splines_3.6.1        
## [16] doParallel_1.0.14     impute_1.58.0         robustbase_0.93-5    
## [19] libcoin_1.0-4         knitr_1.22            polyclip_1.10-0      
## [22] Formula_1.2-3         WGCNA_1.67            cluster_2.0.9        
## [25] GO.db_3.8.2           png_0.1-7             rrcov_1.4-7          
## [28] compiler_3.6.1        backports_1.1.4       assertthat_0.2.1     
## [31] Matrix_1.2-17         lazyeval_0.2.2        tweenr_1.0.1         
## [34] formatR_1.6           acepack_1.4.1         htmltools_0.3.6      
## [37] tools_3.6.1           partykit_1.2-3        gtable_0.3.0         
## [40] glue_1.3.1            reshape2_1.4.3        Rcpp_1.0.1           
## [43] Biobase_2.44.0        preprocessCore_1.46.0 iterators_1.0.10     
## [46] inum_1.0-1            xfun_0.7              fastcluster_1.1.25   
## [49] stringr_1.4.0         DEoptimR_1.0-8        MASS_7.3-51.4        
## [52] scales_1.0.0          lambda.r_1.2.3        yaml_2.2.0           
## [55] C50_0.1.2             memoise_1.1.0         rpart_4.1-15         
## [58] latticeExtra_0.6-28   stringi_1.4.3         RSQLite_2.1.1        
## [61] S4Vectors_0.22.0      pcaPP_1.9-73          foreach_1.4.4        
## [64] checkmate_1.9.3       shape_1.4.4           rlang_0.3.4          
## [67] pkgconfig_2.0.2       matrixStats_0.54.0    evaluate_0.13        
## [70] lattice_0.20-38       purrr_0.3.2           labeling_0.3         
## [73] htmlwidgets_1.3       bit_1.1-14            tidyselect_0.2.5     
## [76] robust_0.4-18         plyr_1.8.4            R6_2.4.0             
## [79] IRanges_2.18.0        Hmisc_4.2-0           fit.models_0.5-14    
## [82] DBI_1.0.0             pillar_1.4.0          foreign_0.8-71       
## [85] withr_2.1.2           survival_2.44-1.1     nnet_7.3-12          
## [88] crayon_1.3.4          bnlearn_4.4.1         futile.options_1.0.1 
## [91] rmarkdown_1.12        GetoptLong_0.1.7      blob_1.1.1           
## [94] Rgraphviz_2.28.0      digest_0.6.18         stats4_3.6.1         
## [97] munsell_0.5.0         beeswarm_0.2.3        vipor_0.4.5